
########################################################################
## helper functions for simulation and applications
########################################################################
group.demean <- function(demean.id, data=NULL, ...){
  unique.id <- unique(demean.id)
  NC <- length(unique.id)
  data.tobemerged <- data.frame(matrix(NA, 0, ncol(data)))
  names(data.tobemerged) <- colnames(data)
  
  for (i in 1:NC){
    sub <- subset(data, demean.id == unique.id[i])
    colmean <- apply(sub, 2, mean)
    data.tobemerged <- rbind(data.tobemerged, sweep(sub, 2, colmean))
    cat("\n demean.id is ", unique.id[i])
  }
  out <- data.tobemerged
  return(out)
}

rmse.fn <- function(x){sqrt(sum(x^2)/length(x))}
 

gen_cov <- function(N, T, K, interaction = FALSE) {
    ## X <- matrix(rnorm(N * T * K), nrow = N * T, ncol = K)
    Sigma <- matrix(0.5, nrow = K, ncol = K)
    diag(Sigma) <- 1
    X <- MASS::mvrnorm(N * T, mu = rep(0, K), Sigma = Sigma)
    colnames(X) <- paste("X_", 1:K, sep = "")
    
    if(interaction){
        intr <- vname <- list()
        count <- 1
        for (i in 1:(K-1)) {
            for (j in (i+1):K) {
                intr[[count]] <- X[,i] * X[,j]
                vname[[count]] <- paste("K", i, j, sep = "_")
                count <- count + 1
            }
        }
        
        Xij <- do.call("cbind", intr)
        colnames(Xij) <- unlist(vname)
        
        X <- cbind(X, Xij)
    }
  return(X)
}
  
gen_beta_int <- function(Kint, mu, pattern, n_break, tau = 1) {
  if (n_break == 0) {
    beta_int0 <- beta_int1 <- rnorm(Kint, mu, 1)
  } else {
      if (pattern == 1) {
          ## everything is non-sparse
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- rnorm(Kint, mu * -tau, 1)
          
      } else if (pattern == 2) {
          ## 1/2 is non-sparse
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 2), mu * -tau, 1), rep(0, (Kint - round(Kint/2))))
          
      } else {
          ## 1/3 is non-sparse
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 3), mu * -tau, 1), rep(0, (Kint - round(Kint / 3))))
      }
      
  }
  
  beta_int <- list(beta_int0, beta_int1)
  return(beta_int)
}

gen_beta <- function(Kint, mu, pattern, n_break, tau = 2) {
  if (n_break == 0) {
      beta_int <- rnorm(Kint, mu, 1)
  } else if(n_break == 1){
      if (pattern == 1) {
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- rnorm(Kint, mu * -tau, 1)
          
      } else if (pattern == 2) {
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 2), mu * -tau, 1), rep(0, (Kint - round(Kint/2))))
          
      } else{
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 3), mu * -tau, 1), rep(0, (Kint - round(Kint / 3))))
      }
      beta_int <- list(beta_int0, beta_int1)
  }else if (n_break == 2){
      if (pattern == 1) {
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- rnorm(Kint, mu * -tau, 1)
          beta_int2 <- rnorm(Kint, mu, 1)  
      } else if (pattern == 2) {
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 2), mu * -tau, 1), rep(0, (Kint - round(Kint/2))))
          beta_int2 <- c(rnorm(round(Kint / 2), mu, 1), rep(0, (Kint - round(Kint/2))))
          
      } else{
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 3), mu * -tau, 1), rep(0, (Kint - round(Kint / 3))))
          beta_int2 <- c(rnorm(round(Kint / 3), mu, 1), rep(0, (Kint - round(Kint / 3))))
      }
      beta_int <- list(beta_int0, beta_int1, beta_int2)
  }else{
      
      stop("n_break should be smaller than 3!")
  }
  return(beta_int)
}




## panel data generating
dgp.break <- function(x, ## exogenous covariate matrix
                      N, ## number of groups
                      T, ## number of times
                      B, ## number of breaks
                      Q, ## number of random varing coefficients
                      true.beta,    # this should be a list
                      true.sigma,   # this should be a list
                      true.D,       # this should be a list
                      break.point, 
                      break.sigma,
                      fixed=FALSE, 
                      print=FALSE){
    
    ##if(fixed){
    X <- x
    ## }else{
    ##    X <- cbind(1, x);
    ## }
    
    mu <- rep(break.point, N)
    sigma <- rep(break.sigma, N)
    ruler <- c(1:T)
    
    ## no break case
    if(B == 0){
        NT <- N*T
        y <- rep(NA, NT)
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);
        W <-  rep(NA, NT)

        ## true beta over time
        true.beta.time <- matrix(NA, K, T)
        for(t in 1:T){
            true.beta.time[, t] <- true.beta
        }
       ## 
        if(fixed){
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                true.V <- true.sigma*diag(ni) 
                true.mean <- Xi%*%true.beta
                y[j:(j+ni-1)] <- true.mean + chol(true.V)%*%rnorm(T) 
                j <- j + ni
            }
        }else{
            W <- X[,1:Q, drop = FALSE]; 
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V <- true.sigma*diag(ni) + Wi%*%true.D%*%t(Wi)
                true.mean <- Xi%*%true.beta
                y[j:(j+ni-1)] <- true.mean + chol(true.V)%*%rnorm(T) 
                j <- j + ni
                ## cat("j = ", j, "\n")
            }
        }
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                time.dummy.mat[t, i] <- ifelse(t < mu[i], i, i+.5) 
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
        
    }else if(B == 1){
        ## panel case
        NT <- N*T
        W <- X[,1:Q, drop = FALSE]; 
        y <- rep(NA, NT)
        
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);   
        ## Q  <-  ncol(W)
        
        ## true beta over time
        true.beta.time <- matrix(NA, K, T)
        s1 <- T - break.point[1]
        s2 <- T - s1
        true.state <- c(rep(1, s1), rep(2, s2))
        for(t in 1:T){
            true.beta.time[, t] <- true.beta[[true.state[t]]] 
        }

        
        ## compute the weight for the break
        W.mat <- matrix(NA, T, N)
        for (i in 1:N){
            W.mat[, i] <- pnorm((ruler-mu[i])/sigma[i])
        }
        Weight <- as.vector(W.mat)
        
        ## data generating by weighting two means and variances
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                ## Wi <- W[j:(j+ni-1), ]
                true.V1 <- true.sigma[[1]]*diag(ni) ## + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- true.sigma[[2]]*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <- (1-weight)*true.mean1 + (1-weight)*chol(true.V1)%*%rnorm(T) +
                    weight*true.mean2 + weight*chol(true.V2)%*%rnorm(T)  + rep(W.fixed[i], ni)
                j <- j + ni
            }
            
        }else{
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V1 <- true.sigma[[1]]*diag(ni) + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- true.sigma[[2]]*diag(ni) + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <- (1-weight)*true.mean1 + (1-weight)*chol(true.V1)%*%rnorm(T) +
                    weight*true.mean2 + weight*chol(true.V2)%*%rnorm(T) 
                j <- j + ni
            }
        }
        if (print==TRUE){
            par(mfrow=c(1,2))
            plot(pnorm((ruler - break.point)/break.sigma))
            plot((1- Weight[1:T])*true.beta1[2] + (Weight[1:T])*true.beta2[2])
        }
        
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                time.dummy.mat[t, i] <- ifelse(t < mu[i], i, i+.5) 
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
    }else{
        ## B == 2
        NT <- N*T
        W <- X[,1:Q, drop = FALSE]; 
        y <- rep(NA, NT)
        
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);   
        ## Q  <-  ncol(W)

        true.beta.time <- matrix(NA, K, T)
        s1 <- T - break.point[2]
        s2 <- break.point[2] - break.point[1]
        s3 <- T - s1 - s2
        true.state <- c(rep(1, s1), rep(2, s2), rep(3, s3))
        for(t in 1:T){
            true.beta.time[, t] <- true.beta[[true.state[t]]] 
        }
        ## ex (17, 33)
        ## mu <- rep(break.point, N)
        ## sigma <- rep(break.sigma, N)
        sigma <- rep(1, 2)
        ruler <- c(1:T)
        ## compute the weight for the break
        ## W.mat <- matrix(NA, T, N)
        ## for (i in 1:N){
        ##     W.mat[, i] <- pnorm((ruler-mu[i])/sigma[i])
        ## }
        ## Weight <- as.vector(W.mat)
        weight1 <- 1- pnorm((ruler-break.point[1])/sigma[1])
        weight2 <- (1- pnorm((ruler-break.point[2])/sigma[2])) - weight1
        weight3 <- 1 - weight1 - weight2
        ## data generating by weighting two means and variances
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                ## Wi <- W[j:(j+ni-1), ]
                true.V1 <- true.sigma[[1]]*diag(ni) ## + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- true.sigma[[2]]*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.V3 <- true.sigma[[3]]*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                true.mean3 <- Xi%*%true.beta[[3]]
                ## weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <-
                    weight1*true.mean1 + weight1*chol(true.V1)%*%rnorm(ni) +
                    weight2*true.mean2 + weight2*chol(true.V2)%*%rnorm(ni) +
                    weight3*true.mean3 + weight3*chol(true.V3)%*%rnorm(ni)  +
                    rep(W.fixed[i], ni)
                j <- j + ni
            }
            
        }else{
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V1 <- true.sigma[[1]]*diag(ni) + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- true.sigma[[2]]*diag(ni) + Wi%*%true.D[[2]]%*%t(Wi)
                true.V3 <- true.sigma[[3]]*diag(ni) + Wi%*%true.D[[3]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                true.mean3 <- Xi%*%true.beta[[3]]
                y[j:(j+ni-1)] <-
                    weight1*true.mean1 + weight1*chol(true.V1)%*%rnorm(ni) + 
                    weight2*true.mean2 + weight2*chol(true.V2)%*%rnorm(ni) +
                    weight3*true.mean3 + weight3*chol(true.V3)%*%rnorm(ni)
                j <- j + ni
            }
        }
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                if(t < break.point[1]){
                    time.dummy.mat[t, i] <- i
                }else if(t < break.point[2]){
                    time.dummy.mat[t, i] <- i + 0.25
                }else{
                    time.dummy.mat[t, i] <- i + .5 
                }
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
        
    ## }else{
    ##    stop("break must be smaller than 2!")
    }
    return(out)
}

    
    
## panel data with common shocks generating
dgp.break.common <- function(x, ## exogenous covariate matrix
                      N, ## number of groups
                      T, ## number of times
                      B, ## number of breaks
                      Q, ## number of random varing coefficients
                      true.beta,    # this should be a list
                      true.sigma,   # this should be a list
                      true.D,       # this should be a list
                      break.point, 
                      break.sigma,
                      fixed=FALSE, 
                      print=FALSE){
    
    ##if(fixed){
    X <- x
    ## }else{
    ##    X <- cbind(1, x);
    ## }
    
    mu <- rep(break.point, N)
    sigma <- rep(break.sigma, N)
    ruler <- c(1:T)
    
    ## no break case
    if(B == 0){
        NT <- N*T
        y <- rep(NA, NT)
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);
        W <-  rep(NA, NT)

        ## true beta over time
        true.beta.time <- matrix(NA, K, T)
        for(t in 1:T){
            true.beta.time[, t] <- true.beta
        }
       ## 
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                true.V <- true.sigma*diag(ni) 
                true.mean <- Xi%*%true.beta
                y[j:(j+ni-1)] <- true.mean + chol(true.V)%*%rnorm(T) 
                j <- j + ni
            }
            ## individual effects
            y <- y + W.fixed[id]
 
        }else{
            W <- X[,1:Q, drop = FALSE]; 
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V <- true.sigma*diag(ni) + Wi%*%true.D%*%t(Wi)
                true.mean <- Xi%*%true.beta
                y[j:(j+ni-1)] <- true.mean + chol(true.V)%*%rnorm(T) 
                j <- j + ni
                ## cat("j = ", j, "\n")
            }
        }
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                time.dummy.mat[t, i] <- ifelse(t < mu[i], i, i+.5) 
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
        
    }else if(B == 1){
        ## panel case
        NT <- N*T
        W <- X[,1:Q, drop = FALSE]; 
        y <- rep(NA, NT)
        ## time id
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);   
        ## Q  <-  ncol(W)
        
        ## true beta over time
        true.beta.time <- matrix(NA, K, T)
        s1 <- T - break.point[1]
        s2 <- T - s1
        true.state <- c(rep(1, s1), rep(2, s2))
        for(t in 1:T){
            true.beta.time[, t] <- true.beta[[true.state[t]]] 
        }

        
        ## compute the weight for the break
        W.mat <- matrix(NA, T, N)
        for (i in 1:N){
            W.mat[, i] <- pnorm((ruler-mu[i])/sigma[i])
        }
        Weight <- as.vector(W.mat)
        
        ## data generating by weighting two means and variances
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                ## Wi <- W[j:(j+ni-1), ]
                ## runif(1) is a random contemporaneous shock
                true.V1 <- (true.sigma[[1]] + runif(1))*diag(ni) ## + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- (true.sigma[[2]] + runif(1))*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <- (1-weight)*true.mean1 + (1-weight)*chol(true.V1)%*%rnorm(T) +
                    weight*true.mean2 + weight*chol(true.V2)%*%rnorm(T)  + rep(W.fixed[i], ni)
                j <- j + ni
            }
            ## individual effects
            y <- y + W.fixed[id]
            
        }else{
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V1 <-  (true.sigma[[1]] + runif(1))*diag(ni) + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <-  (true.sigma[[2]] + runif(1))*diag(ni) + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <- (1-weight)*true.mean1 + (1-weight)*chol(true.V1)%*%rnorm(T) +
                    weight*true.mean2 + weight*chol(true.V2)%*%rnorm(T) 
                j <- j + ni
            }
        }
        if (print==TRUE){
            par(mfrow=c(1,2))
            plot(pnorm((ruler - break.point)/break.sigma))
            plot((1- Weight[1:T])*true.beta1[2] + (Weight[1:T])*true.beta2[2])
        }
        
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                time.dummy.mat[t, i] <- ifelse(t < mu[i], i, i+.5) 
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
    }else{
        ## B == 2
        NT <- N*T
        W <- X[,1:Q, drop = FALSE]; 
        y <- rep(NA, NT)
        
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);   
        ## Q  <-  ncol(W)

        true.beta.time <- matrix(NA, K, T)
        s1 <- T - break.point[2]
        s2 <- break.point[2] - break.point[1]
        s3 <- T - s1 - s2
        true.state <- c(rep(1, s1), rep(2, s2), rep(3, s3))
        for(t in 1:T){
            true.beta.time[, t] <- true.beta[[true.state[t]]] 
        }
        ## ex (17, 33)
        ## mu <- rep(break.point, N)
        ## sigma <- rep(break.sigma, N)
        sigma <- rep(1, 2)
        ruler <- c(1:T)
        ## compute the weight for the break
        ## W.mat <- matrix(NA, T, N)
        ## for (i in 1:N){
        ##     W.mat[, i] <- pnorm((ruler-mu[i])/sigma[i])
        ## }
        ## Weight <- as.vector(W.mat)
        weight1 <- 1- pnorm((ruler-break.point[1])/sigma[1])
        weight2 <- (1- pnorm((ruler-break.point[2])/sigma[2])) - weight1
        weight3 <- 1 - weight1 - weight2
        ## data generating by weighting two means and variances
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                ## Wi <- W[j:(j+ni-1), ]
                true.V1 <-  (true.sigma[[1]] + runif(1))*diag(ni) ## + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <-  (true.sigma[[2]] + runif(1))*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.V3 <-  (true.sigma[[3]] + runif(1))*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                true.mean3 <- Xi%*%true.beta[[3]]
                ## weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <-
                    weight1*true.mean1 + weight1*chol(true.V1)%*%rnorm(ni) +
                    weight2*true.mean2 + weight2*chol(true.V2)%*%rnorm(ni) +
                    weight3*true.mean3 + weight3*chol(true.V3)%*%rnorm(ni)  +
                    rep(W.fixed[i], ni)
                j <- j + ni
            }
            ## individual effects
            y <- y + W.fixed[id]
            
        }else{
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V1 <-  (true.sigma[[1]] + runif(1))*diag(ni) + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <-  (true.sigma[[2]] + runif(1))*diag(ni) + Wi%*%true.D[[2]]%*%t(Wi)
                true.V3 <-  (true.sigma[[3]] + runif(1))*diag(ni) + Wi%*%true.D[[3]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                true.mean3 <- Xi%*%true.beta[[3]]
                y[j:(j+ni-1)] <-
                    weight1*true.mean1 + weight1*chol(true.V1)%*%rnorm(ni) + 
                    weight2*true.mean2 + weight2*chol(true.V2)%*%rnorm(ni) +
                    weight3*true.mean3 + weight3*chol(true.V3)%*%rnorm(ni)
                j <- j + ni
            }
        }
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                if(t < break.point[1]){
                    time.dummy.mat[t, i] <- i
                }else if(t < break.point[2]){
                    time.dummy.mat[t, i] <- i + 0.25
                }else{
                    time.dummy.mat[t, i] <- i + .5 
                }
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
        
    ## }else{
    ##    stop("break must be smaller than 2!")
    }
    return(out)
}


########################################################################
## mother code to check model-fit
########################################################################
## formula.p=formula; data.p = agl; mcmc.p=mcmc;burn.p=burn; thin.p=1; verbose.p=verbose;
## index.p = c('country', 'year');
## effect.p = 'time'; model.p = 'within';standardize.p = FALSE
## Waic.p = TRUE; marginal.p = FALSE; alpha.MH.p = TRUE
## verbose.p=verbose; break.upper = 2;
## interaction.p = 1
BridgeFixedPanelPool <- function(formula.p, data.p, index.p = c('country', 'year'),
                                 model.p = 'within', effect.p = 'time',
                                 Waic.p = TRUE, marginal.p = FALSE, standardize.p = FALSE, 
                                 interaction.p = 1, alpha.MH.p = TRUE,
                                 mcmc.p=mcmc, burn.p=burn, thin.p=1, verbose.p=verbose,
                                 break.upper = 1){
    out <- as.list(rep(NA, break.upper + 1))
    for(i in 1:(break.upper+1)){
        out[[i]] <- BridgeFixedPanelHybrid(formula=formula.p, data = data.p,
                                           interaction = interaction.p,
                                           alpha.MH = alpha.MH.p, standardize = standardize.p,
                                           index = index.p, mcmc=mcmc.p, burn=burn.p,
                                           thin=thin.p, verbose=verbose.p,
                                           model = model.p, effect = effect.p, n.break = i-1,
                                           Waic = Waic.p, marginal=marginal.p)
    }
    if(Waic.p){
        waic <- WaicCompare(out)$waic
        cat("\n Waic is ", waic, "\n")
        detected <- suppressWarnings(which.min(waic))
        cat("\nDetected break number is ", detected - 1, "!\n")
        attr(out, "Waic") <- WaicCompare(out)
    }else if(marginal.p){
        cat("\n Marginal is ", MarginalCompare(out)[1:(break.upper+1)], "\n")
        detected <- suppressWarnings(which.min(MarginalCompare(out)[1:(break.upper+1)]))
        cat("\nDetected break number is ", detected - 1, "!\n")
        attr(out, "Marginal") <- MarginalCompare(out)
    }else{
        cat("We have nothing to report!")
    }
    return(out)
}

## merge three plm outputs
three.plm <- function(formula, data, df.pre, df.post, ## breakpoint,
                      model="within", effect="twoways",
                      index = c("uncode", "year")){
    ## p.df <- pdata.frame(subset(x, imp == 1), index = c("uncode", "year"), drop.index = F)
    ## p1.df <- pdata.frame(subset(x, imp ==1 & year < 1960), index = c("uncode", "year"), drop.index = F)
    ## p2.df <- pdata.frame(subset(x, imp == 1 & year >= 1960), index = c("uncode", "year"), drop.index = F)
    p.df <- plm:::pdata.frame(data, index = index, drop.index = FALSE)
    p1.df <- plm:::pdata.frame(df.pre, index = index, drop.index = FALSE)
    p2.df <- plm:::pdata.frame(df.post, index = index, drop.index = FALSE)
   
    pm1 <- plm(formula, data = p.df, model=model, effect=effect)
    pm2 <- plm(formula, data = p1.df, model=model, effect=effect)
    pm3 <- plm(formula, data = p2.df, model=model, effect=effect)
    
    ## summary(pm1, robust = TRUE)
    ## summary(pm2, robust = TRUE)
    ## summary(pm3, robust = TRUE)
    out <- list(pm1, pm2, pm3)
    return(out)
}


    
all.zero <- function(x){sum(x) + prod(x) == 0}
summary.dss <- function(output, variable.names=TRUE){
    coefs <- attr(output, "hybrid")
    coefs <- coefs[-which(apply(coefs, 1, all.zero)),]
    varnames <- str_replace(attr(output, "xnames")[,1], "_regime1","")
    var.selected <- which(is.element(varnames, rownames(coefs)))
    beta.selected <- c(sapply(1:ns, function(j){paste0("beta", var.selected, "_regime", j)}))
    new.output <- output[, beta.selected]
    if(variable.names){
        names.selected <- c(sapply(1:ns, function(j){paste0(varnames[var.selected], "_regime", j)}))
        colnames(new.output) <- names.selected
    }
    return(summary(new.output))
}
## summary.dss(output)
group.demean <- function(demean.id, data=NULL, ...){
  unique.id <- unique(demean.id)
  NC <- length(unique.id)
  data.tobemerged <- data.frame(matrix(NA, 0, ncol(data)))
  names(data.tobemerged) <- colnames(data)
  
  for (i in 1:NC){
    sub <- subset(data, demean.id == unique.id[i])
    colmean <- apply(sub, 2, mean)
    data.tobemerged <- rbind(data.tobemerged, sweep(sub, 2, colmean))
    cat("\n demean.id is ", unique.id[i])
  }
  out <- data.tobemerged
  return(out)
}

rmse.fn <- function(x){sqrt(sum(x^2)/length(x))}
 

gen_cov <- function(N, T, K, interaction = FALSE) {
    ## X <- matrix(rnorm(N * T * K), nrow = N * T, ncol = K)
    Sigma <- matrix(0.5, nrow = K, ncol = K)
    diag(Sigma) <- 1
    X <- MASS::mvrnorm(N * T, mu = rep(0, K), Sigma = Sigma)
    colnames(X) <- paste("X_", 1:K, sep = "")
    
    if(interaction){
        intr <- vname <- list()
        count <- 1
        for (i in 1:(K-1)) {
            for (j in (i+1):K) {
                intr[[count]] <- X[,i] * X[,j]
                vname[[count]] <- paste("K", i, j, sep = "_")
                count <- count + 1
            }
        }
        
        Xij <- do.call("cbind", intr)
        colnames(Xij) <- unlist(vname)
        
        X <- cbind(X, Xij)
    }
  return(X)
}
  
gen_beta_int <- function(Kint, mu, pattern, n_break, tau = 1) {
  if (n_break == 0) {
    beta_int0 <- beta_int1 <- rnorm(Kint, mu, 1)
  } else {
      if (pattern == 1) {
          ## everything is non-sparse
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- rnorm(Kint, mu * -tau, 1)
          
      } else if (pattern == 2) {
          ## 1/2 is non-sparse
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 2), mu * -tau, 1), rep(0, (Kint - round(Kint/2))))
          
      } else {
          ## 1/3 is non-sparse
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 3), mu * -tau, 1), rep(0, (Kint - round(Kint / 3))))
      }
      
  }
  
  beta_int <- list(beta_int0, beta_int1)
  return(beta_int)
}

gen_beta <- function(Kint, mu, pattern, n_break, tau = 2) {
  if (n_break == 0) {
      beta_int <- rnorm(Kint, mu, 1)
  } else if(n_break == 1){
      if (pattern == 1) {
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- rnorm(Kint, mu * -tau, 1)
          
      } else if (pattern == 2) {
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 2), mu * -tau, 1), rep(0, (Kint - round(Kint/2))))
          
      } else{
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 3), mu * -tau, 1), rep(0, (Kint - round(Kint / 3))))
      }
      beta_int <- list(beta_int0, beta_int1)
  }else if (n_break == 2){
      if (pattern == 1) {
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- rnorm(Kint, mu * -tau, 1)
          beta_int2 <- rnorm(Kint, mu, 1)  
      } else if (pattern == 2) {
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 2), mu * -tau, 1), rep(0, (Kint - round(Kint/2))))
          beta_int2 <- c(rnorm(round(Kint / 2), mu, 1), rep(0, (Kint - round(Kint/2))))
          
      } else{
          beta_int0 <- rnorm(Kint, mu, 1)
          beta_int1 <- c(rnorm(round(Kint / 3), mu * -tau, 1), rep(0, (Kint - round(Kint / 3))))
          beta_int2 <- c(rnorm(round(Kint / 3), mu, 1), rep(0, (Kint - round(Kint / 3))))
      }
      beta_int <- list(beta_int0, beta_int1, beta_int2)
  }else{
      
      stop("n_break should be smaller than 3!")
  }
  return(beta_int)
}




## panel data generating
dgp.break <- function(x, ## exogenous covariate matrix
                      N, ## number of groups
                      T, ## number of times
                      B, ## number of breaks
                      Q, ## number of random varing coefficients
                      true.beta,    # this should be a list
                      true.sigma,   # this should be a list
                      true.D,       # this should be a list
                      break.point, 
                      break.sigma,
                      fixed=FALSE, 
                      print=FALSE){
    
    ##if(fixed){
    X <- x
    ## }else{
    ##    X <- cbind(1, x);
    ## }
    
    mu <- rep(break.point, N)
    sigma <- rep(break.sigma, N)
    ruler <- c(1:T)
    
    ## no break case
    if(B == 0){
        NT <- N*T
        y <- rep(NA, NT)
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);
        W <-  rep(NA, NT)

        ## true beta over time
        true.beta.time <- matrix(NA, K, T)
        for(t in 1:T){
            true.beta.time[, t] <- true.beta
        }
       ## 
        if(fixed){
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                true.V <- true.sigma*diag(ni) 
                true.mean <- Xi%*%true.beta
                y[j:(j+ni-1)] <- true.mean + chol(true.V)%*%rnorm(T) 
                j <- j + ni
            }
        }else{
            W <- X[,1:Q, drop = FALSE]; 
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V <- true.sigma*diag(ni) + Wi%*%true.D%*%t(Wi)
                true.mean <- Xi%*%true.beta
                y[j:(j+ni-1)] <- true.mean + chol(true.V)%*%rnorm(T) 
                j <- j + ni
                ## cat("j = ", j, "\n")
            }
        }
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                time.dummy.mat[t, i] <- ifelse(t < mu[i], i, i+.5) 
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
        
    }else if(B == 1){
        ## panel case
        NT <- N*T
        W <- X[,1:Q, drop = FALSE]; 
        y <- rep(NA, NT)
        
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);   
        ## Q  <-  ncol(W)
        
        ## true beta over time
        true.beta.time <- matrix(NA, K, T)
        s1 <- T - break.point[1]
        s2 <- T - s1
        true.state <- c(rep(1, s1), rep(2, s2))
        for(t in 1:T){
            true.beta.time[, t] <- true.beta[[true.state[t]]] 
        }

        
        ## compute the weight for the break
        W.mat <- matrix(NA, T, N)
        for (i in 1:N){
            W.mat[, i] <- pnorm((ruler-mu[i])/sigma[i])
        }
        Weight <- as.vector(W.mat)
        
        ## data generating by weighting two means and variances
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                ## Wi <- W[j:(j+ni-1), ]
                true.V1 <- true.sigma[[1]]*diag(ni) ## + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- true.sigma[[2]]*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <- (1-weight)*true.mean1 + (1-weight)*chol(true.V1)%*%rnorm(T) +
                    weight*true.mean2 + weight*chol(true.V2)%*%rnorm(T)  + rep(W.fixed[i], ni)
                j <- j + ni
            }
            
        }else{
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V1 <- true.sigma[[1]]*diag(ni) + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- true.sigma[[2]]*diag(ni) + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <- (1-weight)*true.mean1 + (1-weight)*chol(true.V1)%*%rnorm(T) +
                    weight*true.mean2 + weight*chol(true.V2)%*%rnorm(T) 
                j <- j + ni
            }
        }
        if (print==TRUE){
            par(mfrow=c(1,2))
            plot(pnorm((ruler - break.point)/break.sigma))
            plot((1- Weight[1:T])*true.beta1[2] + (Weight[1:T])*true.beta2[2])
        }
        
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                time.dummy.mat[t, i] <- ifelse(t < mu[i], i, i+.5) 
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
    }else{
        ## B == 2
        NT <- N*T
        W <- X[,1:Q, drop = FALSE]; 
        y <- rep(NA, NT)
        
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);   
        ## Q  <-  ncol(W)

        true.beta.time <- matrix(NA, K, T)
        s1 <- T - break.point[2]
        s2 <- break.point[2] - break.point[1]
        s3 <- T - s1 - s2
        true.state <- c(rep(1, s1), rep(2, s2), rep(3, s3))
        for(t in 1:T){
            true.beta.time[, t] <- true.beta[[true.state[t]]] 
        }
        ## ex (17, 33)
        ## mu <- rep(break.point, N)
        ## sigma <- rep(break.sigma, N)
        sigma <- rep(1, 2)
        ruler <- c(1:T)
        ## compute the weight for the break
        ## W.mat <- matrix(NA, T, N)
        ## for (i in 1:N){
        ##     W.mat[, i] <- pnorm((ruler-mu[i])/sigma[i])
        ## }
        ## Weight <- as.vector(W.mat)
        weight1 <- 1- pnorm((ruler-break.point[1])/sigma[1])
        weight2 <- (1- pnorm((ruler-break.point[2])/sigma[2])) - weight1
        weight3 <- 1 - weight1 - weight2
        ## data generating by weighting two means and variances
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                ## Wi <- W[j:(j+ni-1), ]
                true.V1 <- true.sigma[[1]]*diag(ni) ## + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- true.sigma[[2]]*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.V3 <- true.sigma[[3]]*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                true.mean3 <- Xi%*%true.beta[[3]]
                ## weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <-
                    weight1*true.mean1 + weight1*chol(true.V1)%*%rnorm(ni) +
                    weight2*true.mean2 + weight2*chol(true.V2)%*%rnorm(ni) +
                    weight3*true.mean3 + weight3*chol(true.V3)%*%rnorm(ni)  +
                    rep(W.fixed[i], ni)
                j <- j + ni
            }
            
        }else{
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V1 <- true.sigma[[1]]*diag(ni) + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- true.sigma[[2]]*diag(ni) + Wi%*%true.D[[2]]%*%t(Wi)
                true.V3 <- true.sigma[[3]]*diag(ni) + Wi%*%true.D[[3]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                true.mean3 <- Xi%*%true.beta[[3]]
                y[j:(j+ni-1)] <-
                    weight1*true.mean1 + weight1*chol(true.V1)%*%rnorm(ni) + 
                    weight2*true.mean2 + weight2*chol(true.V2)%*%rnorm(ni) +
                    weight3*true.mean3 + weight3*chol(true.V3)%*%rnorm(ni)
                j <- j + ni
            }
        }
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                if(t < break.point[1]){
                    time.dummy.mat[t, i] <- i
                }else if(t < break.point[2]){
                    time.dummy.mat[t, i] <- i + 0.25
                }else{
                    time.dummy.mat[t, i] <- i + .5 
                }
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
        
    ## }else{
    ##    stop("break must be smaller than 2!")
    }
    return(out)
}

    
    
## panel data with common shocks generating
dgp.break.common <- function(x, ## exogenous covariate matrix
                      N, ## number of groups
                      T, ## number of times
                      B, ## number of breaks
                      Q, ## number of random varing coefficients
                      true.beta,    # this should be a list
                      true.sigma,   # this should be a list
                      true.D,       # this should be a list
                      break.point, 
                      break.sigma,
                      fixed=FALSE, 
                      print=FALSE){
    
    ##if(fixed){
    X <- x
    ## }else{
    ##    X <- cbind(1, x);
    ## }
    
    mu <- rep(break.point, N)
    sigma <- rep(break.sigma, N)
    ruler <- c(1:T)
    
    ## no break case
    if(B == 0){
        NT <- N*T
        y <- rep(NA, NT)
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);
        W <-  rep(NA, NT)

        ## true beta over time
        true.beta.time <- matrix(NA, K, T)
        for(t in 1:T){
            true.beta.time[, t] <- true.beta
        }
       ## 
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                true.V <- true.sigma*diag(ni) 
                true.mean <- Xi%*%true.beta
                y[j:(j+ni-1)] <- true.mean + chol(true.V)%*%rnorm(T) 
                j <- j + ni
            }
            ## individual effects
            y <- y + W.fixed[id]
 
        }else{
            W <- X[,1:Q, drop = FALSE]; 
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V <- true.sigma*diag(ni) + Wi%*%true.D%*%t(Wi)
                true.mean <- Xi%*%true.beta
                y[j:(j+ni-1)] <- true.mean + chol(true.V)%*%rnorm(T) 
                j <- j + ni
                ## cat("j = ", j, "\n")
            }
        }
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                time.dummy.mat[t, i] <- ifelse(t < mu[i], i, i+.5) 
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
        
    }else if(B == 1){
        ## panel case
        NT <- N*T
        W <- X[,1:Q, drop = FALSE]; 
        y <- rep(NA, NT)
        ## time id
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);   
        ## Q  <-  ncol(W)
        
        ## true beta over time
        true.beta.time <- matrix(NA, K, T)
        s1 <- T - break.point[1]
        s2 <- T - s1
        true.state <- c(rep(1, s1), rep(2, s2))
        for(t in 1:T){
            true.beta.time[, t] <- true.beta[[true.state[t]]] 
        }

        
        ## compute the weight for the break
        W.mat <- matrix(NA, T, N)
        for (i in 1:N){
            W.mat[, i] <- pnorm((ruler-mu[i])/sigma[i])
        }
        Weight <- as.vector(W.mat)
        
        ## data generating by weighting two means and variances
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                ## Wi <- W[j:(j+ni-1), ]
                ## runif(1) is a random contemporaneous shock
                true.V1 <- (true.sigma[[1]] + runif(1))*diag(ni) ## + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <- (true.sigma[[2]] + runif(1))*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <- (1-weight)*true.mean1 + (1-weight)*chol(true.V1)%*%rnorm(T) +
                    weight*true.mean2 + weight*chol(true.V2)%*%rnorm(T)  + rep(W.fixed[i], ni)
                j <- j + ni
            }
            ## individual effects
            y <- y + W.fixed[id]
            
        }else{
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V1 <-  (true.sigma[[1]] + runif(1))*diag(ni) + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <-  (true.sigma[[2]] + runif(1))*diag(ni) + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <- (1-weight)*true.mean1 + (1-weight)*chol(true.V1)%*%rnorm(T) +
                    weight*true.mean2 + weight*chol(true.V2)%*%rnorm(T) 
                j <- j + ni
            }
        }
        if (print==TRUE){
            par(mfrow=c(1,2))
            plot(pnorm((ruler - break.point)/break.sigma))
            plot((1- Weight[1:T])*true.beta1[2] + (Weight[1:T])*true.beta2[2])
        }
        
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                time.dummy.mat[t, i] <- ifelse(t < mu[i], i, i+.5) 
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
    }else{
        ## B == 2
        NT <- N*T
        W <- X[,1:Q, drop = FALSE]; 
        y <- rep(NA, NT)
        
        id <-  rep(1:N, each=NT/N)
        K  <-  ncol(X);   
        ## Q  <-  ncol(W)

        true.beta.time <- matrix(NA, K, T)
        s1 <- T - break.point[2]
        s2 <- break.point[2] - break.point[1]
        s3 <- T - s1 - s2
        true.state <- c(rep(1, s1), rep(2, s2), rep(3, s3))
        for(t in 1:T){
            true.beta.time[, t] <- true.beta[[true.state[t]]] 
        }
        ## ex (17, 33)
        ## mu <- rep(break.point, N)
        ## sigma <- rep(break.sigma, N)
        sigma <- rep(1, 2)
        ruler <- c(1:T)
        ## compute the weight for the break
        ## W.mat <- matrix(NA, T, N)
        ## for (i in 1:N){
        ##     W.mat[, i] <- pnorm((ruler-mu[i])/sigma[i])
        ## }
        ## Weight <- as.vector(W.mat)
        weight1 <- 1- pnorm((ruler-break.point[1])/sigma[1])
        weight2 <- (1- pnorm((ruler-break.point[2])/sigma[2])) - weight1
        weight3 <- 1 - weight1 - weight2
        ## data generating by weighting two means and variances
        if(fixed){
            W.fixed <- rnorm(N, 0, 5)
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                ## Wi <- W[j:(j+ni-1), ]
                true.V1 <-  (true.sigma[[1]] + runif(1))*diag(ni) ## + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <-  (true.sigma[[2]] + runif(1))*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.V3 <-  (true.sigma[[3]] + runif(1))*diag(ni) ## + Wi%*%true.D[[2]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                true.mean3 <- Xi%*%true.beta[[3]]
                ## weight <- Weight[j:(j+ni-1)]
                y[j:(j+ni-1)] <-
                    weight1*true.mean1 + weight1*chol(true.V1)%*%rnorm(ni) +
                    weight2*true.mean2 + weight2*chol(true.V2)%*%rnorm(ni) +
                    weight3*true.mean3 + weight3*chol(true.V3)%*%rnorm(ni)  +
                    rep(W.fixed[i], ni)
                j <- j + ni
            }
            ## individual effects
            y <- y + W.fixed[id]
            
        }else{
            ## random effects
            j = 1
            for (i in 1:N){
                ni <- length(id[unique(id)==i]) 
                Xi <- X[j:(j+ni-1), ]
                Wi <- W[j:(j+ni-1), ]
                true.V1 <-  (true.sigma[[1]] + runif(1))*diag(ni) + Wi%*%true.D[[1]]%*%t(Wi)
                true.V2 <-  (true.sigma[[2]] + runif(1))*diag(ni) + Wi%*%true.D[[2]]%*%t(Wi)
                true.V3 <-  (true.sigma[[3]] + runif(1))*diag(ni) + Wi%*%true.D[[3]]%*%t(Wi)
                true.mean1 <- Xi%*%true.beta[[1]]
                true.mean2 <- Xi%*%true.beta[[2]]
                true.mean3 <- Xi%*%true.beta[[3]]
                y[j:(j+ni-1)] <-
                    weight1*true.mean1 + weight1*chol(true.V1)%*%rnorm(ni) + 
                    weight2*true.mean2 + weight2*chol(true.V2)%*%rnorm(ni) +
                    weight3*true.mean3 + weight3*chol(true.V3)%*%rnorm(ni)
                j <- j + ni
            }
        }
        ## make id and inputs for HMMpack
        subject.id <- c(rep(1:N, each=T))
        time.id    <- c(rep(1:T, N))
        time.dummy.mat <- matrix(NA, T, N)
        
        ## time.dummy.mat is the categorical variable to do dummy variable regression
        ## to get the correct beta 
        for (i in 1:N){
            for (t in 1:T){
                if(t < break.point[1]){
                    time.dummy.mat[t, i] <- i
                }else if(t < break.point[2]){
                    time.dummy.mat[t, i] <- i + 0.25
                }else{
                    time.dummy.mat[t, i] <- i + .5 
                }
            }
        }
        time.dummy <- as.vector(time.dummy.mat)
        subject.offset <- c(0, T*(1:(N-1)))
        time.offset <-c(0, N*(1:(T-1)))
        
        out <- list(y, X, W, subject.id, time.id, time.dummy, subject.offset, time.offset, true.beta.time)
        names(out) <- c("y", "X", "W", "subject.id", "time.id",
                        "time.dummy", "subject.offset", "time.offset", "true.beta.time")
        
    ## }else{
    ##    stop("break must be smaller than 2!")
    }
    return(out)
}


data_gen <- function(n,p, signal.ratio = 0.7, sigma = NULL) {
  if (is.null(sigma)) {
    sigma <- diag(p)
  }

  n_signal <- round(signal.ratio * p); n_noise <- p - n_signal
  X <- MASS::mvrnorm(n, mu = runif(p, -1, 1), Sigma = diag(p))
  beta1 <- c(runif(n_signal, -3, 3), rep(0, n_noise))
  beta2 <- c(runif(n_signal, -3, 3), rep(0, n_noise))
  permuted <- sample(1:p, p, replace=FALSE)
  beta1 <- beta1[permuted]
  beta2 <- beta2[permuted]
  Y <- rep(NA, n)
  cut <- n/2
  Y[1:cut] <- X[1:cut, ] %*% beta1 + rnorm(n/2)
  Y[(cut+1):n] <- X[(cut+1):n, ] %*% beta2 + rnorm(n/2)

  return(list("X" = X, "Y" = Y, "beta1" = beta1, "beta2" = beta2, "signal" = permuted))  
}

